########################################################
## PROGRAM NAME: 001_inputs.R                         ##
## AUTHOR: MATT MLECZKO                               ##
## DATE CREATED: 04/26/2021                           ##
## INPUTS:                                            ##
##    LTDB 1980 to 2010 boundaries                    ##
##    LTDB 1990 to 2010 boundaries                    ##
##    LTDB 2000 to 2010 boundaries                    ##
##    LTDB 2020 to 2020 boundaries                    ##
##    County to MSA crosswalks from Census            ##
##    Zipcode to tract and MSA crosswalks from HUD    ##
##    Zipcode to MSA crosswalk from MCDC              ##
##    Tract to place crosswalk from MCDC              ## 
##    2022 CT tract crosswalk from CTDC               ##
##    MSA delineation file (list1) from Census        ##
##    social_capital_zip.csv                          ##
##                                                    ##
##    TRACT LEVEL                                     ##
##    1980 and 1990 Census                            ##
##    Census 2000 short and long form data            ##
##    Census 2010                                     ##
##    ACS 2008-2012 five-year estimates               ##
##    Census 2020                                     ##
##    ACS 2018-2022                                   ##
##                                                    ##
##    COUNTY LEVEL                                    ##
##    Census 2000 short and long form data            ##
##    Census 2010                                     ##
##    ACS 2008-2012 five-year estimates               ##
##    Census 2020                                     ##
##    ACS 2018-2022                                   ##
##                                                    ##
## OUTPUTS:                                           ##
##    001_1980_trts.Rda                               ##
##    001_1990_trts.Rda                               ##
##    001_2000_counties_cb.Rda                        ##
##    001_2000_trts_cb.Rda                            ##
##    001_2010_counties_cb.Rda                        ##
##    001_2010_trts_cb.Rda                            ##
##    001_2020_Census_del.Rda                         ##
##    001_2020_counties_cb.Rda                        ##
##    001_2020_trts_cb.Rda                            ##
##    001_county_to_msa_2000.Rda                      ##
##    001_county_to_msa_2010.Rda                      ##
##    001_county_to_msa_2020.Rda                      ##
##    001_ltdb_80t10.Rda                              ##
##    001_ltdb_90t10.Rda                              ##
##    001_ltdb_00t10.Rda                              ##
##    001_ltdb_20t10.Rda                              ##
##    001_trt_t_pl.Rda                                ##
##    001_ziptrt_xwalk.Rda                            ##
##    001_zipmsa_xwalk.Rda                            ##
##    001_sc.Rda                                      ##
##                                                    ## 
## PURPOSE: Process input data                        ##
##                                                    ##
## LIST OF UPDATES:                                   ##
##                                                    ##
########################################################

log <- file("001_inputs.txt") 
sink(log, append=TRUE)
sink(log, append=TRUE, type="message")

## load libraries ##

library(tidycensus)
library(tigris)
library(tidyverse)
library(foreign)
library(stringr)
library(tm)
library(gdata)
library(readxl)
library(sf)
library(haven)
library(data.table)

## define paths ##
input_path <- "PATH TO RAW DATA HERE"
output_path <- "PATH TO OUTPUT DATA HERE"
progs <- "PATH TO PROGRAMS HERE"

## set working directory ##
setwd(input_path)

## load necessary functions ## 
`%notin%` <- Negate(`%in%`)
source(paste0(progs,"stata_merge.R"))

## input Census key ##
census_api_key("INPUT YOUR CENSUS API KEY HERE")

## states to loop through ##
states <- c("AL","AK","AZ","AR","CA","CO","CT","DE",
            "DC","FL","GA","HI","ID","IL","IN","IA","KS",
            "KY","LA","ME","MD","MA","MI","MN","MS","MO",
            "MT","NE","NV","NH","NJ","NM","NY","NC","ND",
            "OH","OK","OR","PA","RI","SC","SD","TN","TX",
            "UT","VT","VA","WA","WV","WI","WY")

##
## load social capital data for later ##
##

sc.in <- read.csv("social_capital_zip.csv",
                  header = T,
                  stringsAsFactors = F)

sc.in$ZIP <- str_pad(sc.in$zip, 5, "left","0")

## save file ##
save(sc.in,
     file= paste0(output_path,
                  "/001_sc.Rda"))

##
## load and process LTDB files ##
##

## 1980 to 2010 boundaries ## 
ltdb.80t10.in <- read.csv("crosswalk_1980_2010.csv",
                          header=T,
                          stringsAsFactors = F)

ltdb.80t10 <- ltdb.80t10.in %>%
  mutate(trtid80_ch = str_pad(trtid80, 11, "left","0"),
         trtid10_ch = str_pad(trtid10, 11, "left","0")) %>%
  select(-trtid80,
         -trtid10) %>%
  rename(trtid80 = trtid80_ch,
         trtid10 = trtid10_ch) %>%
  mutate(GEOID = trtid80) %>%
  select(trtid80,
         trtid10,
         GEOID,
         everything())

paste("1980 GEOID CHECK")
range(nchar(trim(ltdb.80t10$trtid80)))
paste("2010 GEOID CHECK")
range(nchar(trim(ltdb.80t10$trtid10)))
paste("1980 CLONE GEOID CHECK")
range(nchar(trim(ltdb.80t10$GEOID)))

## save file ##
save(ltdb.80t10,
     file= paste0(output_path,
                  "/001_ltdb_80t10.Rda"))

## 1990 to 2010 boundaries ## 
ltdb.90t10.in <- read.csv("crosswalk_1990_2010.csv",
                          header=T,
                          stringsAsFactors = F)

ltdb.90t10 <- ltdb.90t10.in %>%
  mutate(trtid90_ch = str_pad(trtid90, 11, "left","0"),
         trtid10_ch = str_pad(trtid10, 11, "left","0")) %>%
  select(-trtid90,
         -trtid10) %>%
  rename(trtid90 = trtid90_ch,
         trtid10 = trtid10_ch) %>%
  mutate(GEOID = trtid90) %>%
  select(trtid90,
         trtid10,
         GEOID,
         everything())

paste("1990 GEOID CHECK")
range(nchar(trim(ltdb.90t10$trtid90)))
paste("2010 GEOID CHECK")
range(nchar(trim(ltdb.90t10$trtid10)))
paste("1990 CLONE GEOID CHECK")
range(nchar(trim(ltdb.90t10$GEOID)))

## save file ##
save(ltdb.90t10,
     file= paste0(output_path,
                  "/001_ltdb_90t10.Rda"))


## 2000 to 2010 boundaries ## 
ltdb.00t10.in <- read.csv("crosswalk_2000_2010.csv",
                          header=T,
                          stringsAsFactors = F)

ltdb.00t10 <- ltdb.00t10.in %>%
  mutate(trtid00_ch = str_pad(trtid00, 11, "left","0"),
         trtid10_ch = str_pad(trtid10, 11, "left","0")) %>%
  select(-trtid00,
         -trtid10) %>%
  rename(trtid00 = trtid00_ch,
         trtid10 = trtid10_ch) %>%
  mutate(GEOID = trtid00) %>%
  select(trtid00,
         trtid10,
         GEOID,
         everything())

paste("2000 GEOID CHECK")
range(nchar(trim(ltdb.00t10$trtid00)))
paste("2010 GEOID CHECK")
range(nchar(trim(ltdb.00t10$trtid10)))
paste("2000 CLONE GEOID CHECK")
range(nchar(trim(ltdb.00t10$GEOID)))

## save file ##
save(ltdb.00t10,
     file= paste0(output_path,
                  "/001_ltdb_00t10.Rda"))

## 2020 to 2010 boundaries ## 
ltdb.20t10.in <- read.table("trtxwalk20102020.txt",
                            sep = "|",
                            header = T,
                            stringsAsFactors= F)

ltdb.20t10 <- ltdb.20t10.in %>%
  mutate(GEOID10 = str_pad(trim(GEOID_TRACT_10), 11, "left", "0"),
         GEOID20 = str_pad(trim(GEOID_TRACT_20), 11, "left", "0"),
         GEOID = GEOID20,
         landarea10 = as.numeric(AREALAND_TRACT_10),
         waterarea10 = as.numeric(AREAWATER_TRACT_10),
         landarea20 = as.numeric(AREALAND_TRACT_20),
         waterarea20 = as.numeric(AREAWATER_TRACT_20),
         landport = as.numeric(AREALAND_PART),
         waterport = as.numeric(AREAWATER_PART),
         totarea10 = rowSums(cbind(landarea10,waterarea10),na.rm=T),
         totarea20 = rowSums(cbind(landarea20,waterarea20),na.rm=T),
         portareatot = rowSums(cbind(landport,waterport),na.rm=T),
         wt = landport/landarea20) %>%
  select(GEOID10,
         GEOID20,
         GEOID,
         wt)

paste("2020 GEOID CHECK")
range(nchar(trim(ltdb.20t10$GEOID10)))
paste("2010 GEOID CHECK")
range(nchar(trim(ltdb.20t10$GEOID20)))
paste("2020 CLONE GEOID CHECK")
range(nchar(trim(ltdb.20t10$GEOID)))

## save file ##
save(ltdb.20t10,
     file= paste0(output_path,
                 "/001_ltdb_20t10.Rda"))

## 
## tract to zip xwalk ##
##

ziptrt.xwalk.in <- read_excel("zip_tract_062022.xlsx")
ziptrt.xwalk.in$GEOID <- ziptrt.xwalk.in$TRACT

## save file ##
save(ziptrt.xwalk.in,
     file= paste0(output_path,
                  "/001_ziptrt_xwalk.Rda"))

##
## zip to MSA xwalk ##
##

zip.msa.xwalk.in <- read.csv("zip_cbsa_xwalk.csv",
                             header = T,
                             stringsAsFactors = F)

zip.msa.xwalk <- zip.msa.xwalk.in %>%
  filter(ZIPName %notin% c("ZIP Code name",
                           "[not in a ZCTA]")) %>%
  rename(ZIP = zcta,
         CBSA = cbsa20)

## save file ##
save(zip.msa.xwalk,
     file= paste0(output_path,
                  "/001_zipmsa_xwalk.Rda"))

##
## load and process county-to-MSA crosswalks ##
##

## 2000 ##
county.t.msa.2000.in <- fread("county_to_msa_2000.csv",
                              header=TRUE, 
                              sep=",")

county.t.msa.2000 <- county.t.msa.2000.in %>%
  filter(cbsa10 != "CBSA (2010)") %>%
  mutate(FIPS = str_pad(county, 5, "left", "0")) %>%
  select(cbsa10,
         FIPS,
         afact) %>%
  rename(CBSA = cbsa10)

print("2000 CBSA CODE CHECK")
range(nchar(trim(county.t.msa.2000$CBSA)))
print("2000 FIPS CODE CHECK")
range(nchar(trim(county.t.msa.2000$FIPS)))

## 2010 ##
county.t.msa.2010.in <- fread("county_to_msa_2010.csv",
                              header=TRUE, 
                              sep=",")

county.t.msa.2010 <- county.t.msa.2010.in %>%
  filter(cbsa10 != "CBSA (2010)") %>%
  mutate(FIPS = str_pad(county, 5, "left", "0")) %>%
  select(cbsa10,
         FIPS,
         afact) %>%
  rename(CBSA = cbsa10)

print("2010 CBSA CODE CHECK")
range(nchar(trim(county.t.msa.2010$CBSA)))
print("2010 FIPS CODE CHECK")
range(nchar(trim(county.t.msa.2010$FIPS)))

## 2020 ##
county.t.msa.2020.in <- fread("county_to_msa_2020.csv",
                              header=TRUE, 
                              sep=",")

county.t.msa.2020 <- county.t.msa.2020.in %>%
  filter(cbsa10 != "CBSA (2010)") %>%
  mutate(FIPS = str_pad(county, 5, "left", "0")) %>%
  select(cbsa10,
         FIPS,
         afact) %>%
  rename(CBSA = cbsa10)

print("2020 CBSA CODE CHECK")
range(nchar(trim(county.t.msa.2020$CBSA)))
print("2020 FIPS CODE CHECK")
range(nchar(trim(county.t.msa.2020$FIPS)))

##
## fix FIPS issues ## 
##

fips.list <- list(county.t.msa.2000,
                  county.t.msa.2010,
                  county.t.msa.2020)

fix.fips <- function(x){

    x$CBSA[x$FIPS == "01107"] <- "46220"
    x$CBSA[x$FIPS == "01129"] <- "33660"
    x$CBSA[x$FIPS == "04025"] <- "39150"
    x$CBSA[x$FIPS == "05081"] <- "45500"
    x$CBSA[x$FIPS == "06037"] <- "31080"
    x$CBSA[x$FIPS == "06059"] <- "31080"
    x$CBSA[x$FIPS == "06083"] <- "42200"
    x$CBSA[x$FIPS == "09015"] <- "49340"
    x$CBSA[x$FIPS == "10005"] <- "41540"
    x$CBSA[x$FIPS == "12035"] <- "19660"
    x$CBSA[x$FIPS == "12075"] <- "23540"
    x$CBSA[x$FIPS == "12131"] <- "18880"
    x$CBSA[x$FIPS == "13181"] <- "12260"
    x$CBSA[x$FIPS == "13211"] <- "12060"
    x$CBSA[x$FIPS == "13225"] <- "47580"
    x$CBSA[x$FIPS == "13259"] <- "17980"
    x$CBSA[x$FIPS == "13263"] <- "17980"
    x$CBSA[x$FIPS == "15003"] <- "46520"
    x$CBSA[x$FIPS == "16023"] <- "26820"
    x$CBSA[x$FIPS == "17057"] <- "37900"
    x$CBSA[x$FIPS == "17087"] <- "16060"
    x$CBSA[x$FIPS == "17113"] <- "14010"
    x$CBSA[x$FIPS == "17199"] <- "16060"
    x$CBSA[x$FIPS == "18007"] <- "29200"
    x$CBSA[x$FIPS == "18015"] <- "29200"
    x$CBSA[x$FIPS == "18095"] <- "26900"
    x$CBSA[x$FIPS == "18121"] <- "45460"
    x$CBSA[x$FIPS == "18157"] <- "29200"
    x$CBSA[x$FIPS == "18161"] <- "17140"
    x$CBSA[x$FIPS == "18171"] <- "29200"
    x$CBSA[x$FIPS == "19015"] <- "11180"
    x$CBSA[x$FIPS == "19099"] <- "19780"
    x$CBSA[x$FIPS == "21003"] <- "14540"
    x$CBSA[x$FIPS == "21031"] <- "14540"
    x$CBSA[x$FIPS == "21043"] <- "26580"
    x$CBSA[x$FIPS == "21163"] <- "21060"
    x$CBSA[x$FIPS == "26139"] <- "24340"
    x$CBSA[x$FIPS == "22001"] <- "29180"
    x$CBSA[x$FIPS == "22007"] <- "12940"
    x$CBSA[x$FIPS == "22045"] <- "29180"
    x$CBSA[x$FIPS == "22067"] <- "33740"
    x$CBSA[x$FIPS == "22093"] <- "35380"
    x$CBSA[x$FIPS == "22113"] <- "29180"
    x$CBSA[x$FIPS == "24037"] <- "15680"
    x$CBSA[x$FIPS == "24047"] <- "41540"
    x$CBSA[x$FIPS == "26117"] <- "24340"
    x$CBSA[x$FIPS == "26139"] <- "24340"
    x$CBSA[x$FIPS == "26155"] <- "29620"
    x$CBSA[x$FIPS == "27045"] <- "40340"
    x$CBSA[x$FIPS == "27075"] <- "20260"
    x$CBSA[x$FIPS == "27079"] <- "33460"
    x$CBSA[x$FIPS == "27095"] <- "33460"
    x$CBSA[x$FIPS == "28031"] <- "25620"
    x$CBSA[x$FIPS == "28051"] <- "27140"
    x$CBSA[x$FIPS == "28059"] <- "25060"
    x$CBSA[x$FIPS == "28163"] <- "27140"
    x$CBSA[x$FIPS == "29053"] <- "17860"
    x$CBSA[x$FIPS == "30095"] <- "13740"
    x$CBSA[x$FIPS == "36123"] <- "40380"
    x$CBSA[x$FIPS == "37019"] <- "34820"
    x$CBSA[x$FIPS == "37029"] <- "47260"
    x$CBSA[x$FIPS == "37057"] <- "49180"
    x$CBSA[x$FIPS == "37073"] <- "47260"
    x$CBSA[x$FIPS == "37077"] <- "20500"
    x$CBSA[x$FIPS == "37085"] <- "22180"
    x$CBSA[x$FIPS == "37097"] <- "16740"
    x$CBSA[x$FIPS == "37109"] <- "16740"
    x$CBSA[x$FIPS == "37159"] <- "16740"
    x$CBSA[x$FIPS == "38065"] <- "13900"
    x$CBSA[x$FIPS == "39057"] <- "19430"
    x$CBSA[x$FIPS == "39073"] <- "18140"
    x$CBSA[x$FIPS == "39081"] <- "48260"
    x$CBSA[x$FIPS == "39109"] <- "19430"
    x$CBSA[x$FIPS == "39113"] <- "19430"
    x$CBSA[x$FIPS == "39127"] <- "18140"
    x$CBSA[x$FIPS == "40033"] <- "30020"
    x$CBSA[x$FIPS == "45007"] <- "24860"
    x$CBSA[x$FIPS == "45023"] <- "16740"
    x$CBSA[x$FIPS == "45027"] <- "44940"
    x$CBSA[x$FIPS == "45057"] <- "16740"
    x$CBSA[x$FIPS == "47013"] <- "28940"
    x$CBSA[x$FIPS == "47033"] <- "27180"
    x$CBSA[x$FIPS == "47053"] <- "27180"
    x$CBSA[x$FIPS == "47119"] <- "34980"
    x$CBSA[x$FIPS == "47129"] <- "28940"
    x$CBSA[x$FIPS == "47145"] <- "28940"
    x$CBSA[x$FIPS == "48145"] <- "47380"
    x$CBSA[x$FIPS == "48203"] <- "30980"
    x$CBSA[x$FIPS == "48229"] <- "21340"
    x$CBSA[x$FIPS == "48305"] <- "31180"
    x$CBSA[x$FIPS == "48317"] <- "33260"
    x$CBSA[x$FIPS == "48359"] <- "11100"
    x$CBSA[x$FIPS == "48431"] <- "41660"
    x$CBSA[x$FIPS == "49003"] <- "36260"
    x$CBSA[x$FIPS == "51047"] <- "47900"
    x$CBSA[x$FIPS == "51113"] <- "47900"
    x$CBSA[x$FIPS == "51157"] <- "47900"
    x$CBSA[x$FIPS == "51175"] <- "47260"
    x$CBSA[x$FIPS == "51620"] <- "47260"
    x$CBSA[x$FIPS == "53065"] <- "44060"
    x$CBSA[x$FIPS == "54009"] <- "48260"
    x$CBSA[x$FIPS == "54019"] <- "13220"
    x$CBSA[x$FIPS == "54029"] <- "48260"
    x$CBSA[x$FIPS == "54035"] <- "16620"
    x$CBSA[x$FIPS == "54079"] <- "26580"
    x$CBSA[x$FIPS == "55045"] <- "31540"
    x$CBSA[x$FIPS == "55069"] <- "48140"
    
    return(x)
}    

fips.data.out <- lapply(fips.list, fix.fips)

county.t.msa.2000.final <- fips.data.out[[1]]
county.t.msa.2010.final <- fips.data.out[[2]]
county.t.msa.2020.final <- fips.data.out[[3]]

## save files ##
save(county.t.msa.2000.final,
     file= paste0(output_path,
                  "/001_county_to_msa_2000.Rda"))

save(county.t.msa.2010.final,
     file= paste0(output_path,
                  "/001_county_to_msa_2010.Rda"))

save(county.t.msa.2020.final,
     file= paste0(output_path,
                  "/001_county_to_msa_2020.Rda"))

##
## tract to place crosswalk ##
##

tract.to.pl.in <- readr::read_csv("tract_to_place.csv")

## clean up errors in the tract ID ##
tract.to.pl.rf <- tract.to.pl.in %>%
  mutate(tract.rf = str_replace_all(tract, '\\.', ''),
         fintract = str_pad(tract.rf, 6, "right", "0"))

range(nchar(trim(tract.to.pl.rf$fintract)))

## process ##
tract.to.pl <- tract.to.pl.rf %>%
  filter(placenm %in% c("New York city, NY",
                        "Newark city, NJ",
                        "Jersey City city, NJ",
                        "Los Angeles city, CA",
                        "Long Beach city, CA",
                        "Anaheim city, CA",
                        "Chicago city, IL",
                        "Dallas city, TX",
                        "Fort Worth city, TX",
                        "Houston city, TX",
                        "Washington city, DC",
                        "Philadelphia city, PA",
                        "Camden city, NJ",
                        "Wilmington city, DE",
                        "Miami city, FL",
                        "Atlanta city, GA",
                        "Boston city, MA",
                        "Phoenix city, AZ",
                        "San Francisco city, CA",
                        "Oakland city, CA",
                        "Riverside city, CA",
                        "San Bernardino city, CA",
                        "Detroit city, MI",
                        "Seattle city, WA",
                        "Minneapolis city, MN",
                        "St. Paul city, MN",
                        "San Diego city, CA",
                        "Tampa city, FL",
                        "Denver city, CO",
                        "Baltimore city, MD",
                        "St. Louis city, MO",
                        "Orlando city, FL",
                        "Charlotte city, NC",
                        "San Antonio city, TX",
                        "Portland city, OR",
                        "Sacramento city, CA",
                        "Pittsburgh city, PA",
                        "Austin city, TX",
                        "Las Vegas city, NV",
                        "Cincinnati city, OH",
                        "Kansas City city, KS",
                        "Kansas City city, MO",
                        "Columbus city, OH",
                        "Indianapolis city (balance), IN",
                        "Cleveland city, OH",
                        "San Jose city, CA",
                        "Nashville-Davidson metropolitan government (balance), TN",
                        "Virginia Beach city, VA",
                        "Providence city, RI",
                        "Jacksonville city, FL",
                        "Milwaukee city, WI",
                        "Oklahoma City city, OK",
                        "Raleigh city, NC",
                        "Louisville/Jefferson County metro government (balance), KY",
                        "Memphis city, TN",
                        "Richmond city, VA",
                        "New Orleans city, LA",
                        "Salt Lake City city, UT",
                        "Hartford city, CT",
                        "Buffalo city, NY",
                        "Birmingham city, AL",
                        "Rochester city, NY",
                        "Grand Rapids city, MI",
                        "Tucson city, AZ",
                        "Urban Honolulu CDP, HI",
                        "Tulsa city, OK",
                        "Fresno city, CA",
                        "Worcester city, MA",
                        "Omaha city, NE",
                        "Bridgeport city, CT",
                        "Stamford city, CT",
                        "Greenville city, SC",
                        "Albuquerque city, NM",
                        "Bakersfield city, CA",
                        "Albany city, NY",
                        "Knoxville city, TN",
                        "McAllen city, TX",
                        "Baton Rouge city, LA",
                        "El Paso city, TX",
                        "Allentown city, PA",
                        "New Haven city, CT",
                        "Oxnard city, CA",
                        "North Port city, FL",
                        "Sarasota city, FL",
                        "Bradenton city, FL",
                        "Columbia city, SC",
                        "Dayton city, OH",
                        "Charleston city, SC",
                        "Stockton city, CA",
                        "Greensboro city, NC",
                        "High Point city, NC",
                        "Boise City city, ID",
                        "Cape Coral city, FL",
                        "Fort Myers city, FL",
                        "Colorado Springs city, CO",
                        "Little Rock city, AR",
                        "Lakeland city, FL",
                        "Des Moines city, IA",
                        "Akron city, OH",
                        "Springfield city, MA",
                        "Poughkeepsie city, NY",
                        "Newburgh city, NY",
                        "Middletown city, NY",
                        "Ogden city, UT",
                        "Madison city, WI",
                        "Winston-Salem city, NC",
                        "Provo city, UT",
                        "Orem city, UT",
                        "Deltona city, FL",
                        "Daytona Beach city, FL",
                        "Ormond Beach city, FL",
                        "Syracuse city, NY",
                        "Durham city, NC",
                        "Chapel Hill town, NC",
                        "Wichita city, KS",
                        "Toledo city, OH",
                        "Augusta-Richmond County consolidated government (balance), GA",
                        "Palm Bay city, FL",
                        "Melbourne city, FL",
                        "Titusville city, FL",
                        "Jackson city, MS",
                        "Harrisburg city, PA",
                        "Spokane city, WA",
                        "Scranton city, PA",
                        "Chattanooga city, TN",
                        "Lancaster city, PA",
                        "Modesto city, CA",
                        "Portland city, ME",
                        "Fayetteville city, AR",
                        "Springdale city, AR",
                        "Rogers city, AR",
                        "Fayetteville city, NC",
                        "East Lansing city, MI",
                        "Lansing city, MI",
                        "Youngstown city, OH",
                        "Lexington-Fayette urban county, KY",
                        "Pensacola city, FL",
                        "Huntsville city, AL",
                        "Reno city, NV",
                        "Santa Rosa city, CA",
                        "Petaluma city, CA",
                        "Myrtle Beach city, SC",
                        "Port St. Lucie city, FL",
                        "Lafayette city, LA",
                        "Springfield city, MO",
                        "Killeen city, TX",
                        "Temple city, TX",
                        "Visalia city, CA",
                        "Asheville city, NC",
                        "York city, PA",
                        "Vallejo city, CA",
                        "Santa Maria city, CA",
                        "Santa Barbara city, CA",
                        "Salinas city, CA",
                        "Salem city, OR",
                        "Mobile city, AL",
                        "Reading city, PA",
                        "Manchester city, NH",
                        "Nashua city, NH",
                        "Corpus Christi city, TX",
                        "Brownsville city, TX",
                        "Harlingen city, TX",
                        "Fort Wayne city, IN",
                        "Salisbury city, MD",
                        "Gulfport city, MS",
                        "Biloxi city, MS",
                        "Flint city, MI",
                        "Savannah city, GA",
                        "Peoria city, IL",
                        "Canton city, OH",
                        "Anchorage municipality, AK",
                        "Beaumont city, TX",
                        "Port Arthur city, TX",
                        "Shreveport city, LA",
                        "Bossier City city, LA",
                        "Trenton city, NJ",
                        "Montgomery city, AL",
                        "Davenport city, IA",
                        "Tallahassee city, FL",
                        "Eugene city, OR",
                        "Springfield city, OR",
                        "Ocala city, FL",
                        "Naples city, FL",
                        "Ann Arbor city, MI",
                        "Hickory city, NC",
                        "Morganton city, NC",
                        "Lenoir city, NC",
                        "Huntington city, WV",
                        "Ashland city, KY",
                        "Fort Collins city, CO",
                        "Lincoln city, NE",
                        "Gainesville city, FL",
                        "Rockford city, IL",
                        "Boulder city, CO",
                        "Greeley city, CO",
                        "Columbus city, GA",
                        "Green Bay city, WI",
                        "Spartanburg city, SC",
                        "South Bend city, IN",
                        "Mishawaka city, IN",
                        "Lubbock city, TX",
                        "Clarksville city, TN",
                        "Roanoke city, VA",
                        "Evansville city, IN",
                        "Kingsport city, TN",
                        "Kennewick city, WA",
                        "Richland city, WA",
                        "Olympia city, WA",
                        "Hagerstown city, MD",
                        "Martinsburg city, WV",
                        "Utica city, NY",
                        "Duluth city, MN",
                        "Crestview city, FL",
                        "Fort Walton Beach city, FL",
                        "Destin city, FL",
                        "Longview city, TX",
                        "Wilmington city, NC",
                        "San Luis Obispo city, CA",
                        "Merced city, CA",
                        "Waco city, TX",
                        "Sioux Falls city, SD",
                        "Cedar Rapids city, IA",
                        "Bremerton city, WA",
                        "Silverdale CDP, WA",
                        "Port Orchard city, WA",
                        "Atlantic City city, NJ",
                        "Erie city, PA",
                        "Santa Cruz city, CA",
                        "Watsonville city, CA",
                        "Amarillo city, TX",
                        "Tuscaloosa city, AL",
                        "Norwich city, CT",
                        "New London city, CT",
                        "College Station city, TX",
                        "Bryan city, TX",
                        "Laredo city, TX",
                        "Kalamazoo city, MI",
                        "Lynchburg city, VA",
                        "Charleston city, WV",
                        "Yakima city, WA",
                        "Fargo city, ND",
                        "Binghamton city, NY",
                        "Fort Smith city, AR",
                        "Appleton city, WI",
                        "Prescott city, AZ",
                        "Macon city, GA",
                        "Tyler city, TX",
                        "Topeka city, KS",
                        "Daphne city, AL",
                        "Fairhope city, AL",
                        "Foley city, AL",
                        "Barnstable Town city, MA",
                        "Bellingham city, WA",
                        "Rochester city, MN",
                        "Burlington city, VT",
                        "Lafayette city, IN",
                        "Medford city, OR",
                        "Champaign city, IL",
                        "Urbana city, IL",
                        "Lake Charles city, LA",
                        "Charlottesville city, VA",
                        "Las Cruces city, NM",
                        "Hilton Head Island town, SC",
                        "Bluffton town, SC",
                        "Beaufort city, SC",
                        "Athens-Clarke County unified government (balance), GA",
                        "Lake Havasu City city, AZ",
                        "Kingman city, AZ",
                        "Chico city, CA",
                        "Columbia city, MO",
                        "Springfield city, IL",
                        "Johnson City city, TN",
                        "Houma city, LA",
                        "Thibodaux city, LA",
                        "Monroe city, LA",
                        "Elkhart city, IN",
                        "Goshen city, IN",
                        "Jacksonville city, NC",
                        "Yuma city, AZ",
                        "Gainesville city, GA",
                        "Florence city, SC",
                        "St. Cloud city, MN",
                        "Bend city, OR",
                        "Racine city, WI",
                        "Warner Robins city, GA",
                        "Saginaw city, MI",
                        "Punta Gorda city, FL",
                        "Terre Haute city, IN",
                        "Billings city, MT",
                        "Redding city, CA",
                        "Dover city, DE",
                        "Kingston city, NY",
                        "Joplin city, MO",
                        "Yuba City city, CA",
                        "Jackson city, TN",
                        "St. George city, UT",
                        "El Centro city, CA",
                        "Bowling Green city, KY",
                        "Abilene city, TX",
                        "Muskegon city, MI",
                        "Iowa City city, IA",
                        "Midland city, TX",
                        "Panama City city, FL",
                        "Auburn city, AL",
                        "Hattiesburg city, MS",
                        "Eau Claire city, WI",
                        "Oshkosh city, WI",
                        "Burlington city, NC",
                        "Coeur dAlene city, ID",
                        "Bloomington city, IL",
                        "Greenville city, NC",
                        "Cedar Falls city, IA",
                        "Waterloo city, IA",
                        "East Stroudsburg borough, PA",
                        "Pueblo city, CO",
                        "Wausau city, WI",
                        "Blacksburg town, VA",
                        "Christiansburg town, VA",
                        "Odessa city, TX",
                        "Kahului CDP, HI",
                        "Janesville city, WI",
                        "Beloit city, WI",
                        "Bloomington city, IN",
                        "Jackson city, MI",
                        "Sebastian city, FL",
                        "Vero Beach city, FL",
                        "State College borough, PA",
                        "Idaho Falls city, ID",
                        "Decatur city, AL",
                        "Madera city, CA",
                        "Chambersburg borough, PA",
                        "Waynesboro borough, PA",
                        "Grand Junction city, CO",
                        "Elizabethtown city, KY",
                        "Santa Fe city, NM",
                        "Monroe city, MI",
                        "Niles city, MI",
                        "Vineland city, NJ",
                        "Homosassa Springs CDP, FL",
                        "Hanford city, CA",
                        "Bangor city, ME",
                        "Alexandria city, LA",
                        "Dothan city, AL",
                        "Florence city, AL",
                        "Muscle Shoals city, AL",
                        "Jefferson City city, MO",
                        "Sioux City city, IA",
                        "Albany city, GA",
                        "Wichita Falls city, TX",
                        "Valdosta city, GA",
                        "Texarkana city, TX",
                        "Logan city, UT",
                        "Flagstaff city, AZ",
                        "Rocky Mount city, NC",
                        "Lebanon city, PA",
                        "Dalton city, GA",
                        "Morristown city, TN",
                        "Winchester city, VA",
                        "Morgantown city, WV",
                        "La Crosse city, WI",
                        "Wheeling city, WV",
                        "Rapid City city, SD",
                        "Napa city, CA",
                        "Sumter city, SC",
                        "Springfield city, OH",
                        "Harrisonburg city, VA",
                        "Sherman city, TX",
                        "Battle Creek city, MI",
                        "Jonesboro city, AR",
                        "Manhattan city, KS",
                        "Bismarck city, ND",
                        "Johnstown city, PA",
                        "Carbondale city, IL",
                        "Marion city, IL",
                        "Hammond city, LA",
                        "The Villages CDP, FL",
                        "Mount Vernon city, WA",
                        "Pittsfield city, MA",
                        "Albany city, OR",
                        "Glens Falls city, NY",
                        "Lawton city, OK",
                        "Cleveland city, TN",
                        "Sierra Vista city, AZ",
                        "Douglas city, AZ",
                        "Staunton city, VA",
                        "Ames city, IA",
                        "Mansfield city, OH",
                        "San Angelo city, TX",
                        "Altoona city, PA",
                        "New Bern city, NC",
                        "Wenatchee city, WA",
                        "Farmington city, NM",
                        "Owensboro city, KY",
                        "St. Joseph city, MO",
                        "Lawrence city, KS",
                        "Sheboygan city, WI",
                        "Missoula city, MT",
                        "Goldsboro city, NC",
                        "Weirton city, WV",
                        "Steubenville city, OH",
                        "Watertown city, NY",
                        "Anniston city, AL",
                        "Oxford city, AL",
                        "Beckley city, WV",
                        "Twin Falls city, ID",
                        "Williamsport city, PA",
                        "California CDP, MD",
                        "Lexington Park CDP, MD",
                        "Brunswick city, GA",
                        "Michigan City city, IN",
                        "La Porte city, IN",
                        "Muncie city, IN",
                        "Lewiston city, ME",
                        "Auburn city, ME",
                        "Longview city, WA",
                        "Kankakee city, IL",
                        "Ithaca city, NY",
                        "Grand Forks city, ND",
                        "Fond du Lac city, WI",
                        "Decatur city, IL",
                        "Bay City city, MI",
                        "Gettysburg borough, PA",
                        "Mankato city, MN",
                        "Gadsden city, AL",
                        "Lima city, OH",
                        "Avon Park city, FL",
                        "Sebring city, FL",
                        "Cheyenne city, WY",
                        "Hot Springs city, AR",
                        "Dubuque city, IA",
                        "Rome city, GA",
                        "Victoria city, TX",
                        "Cape Girardeau city, MO",
                        "Fairbanks city, AK",
                        "Ocean City city, NJ",
                        "Corvallis city, OR",
                        "Cumberland city, MD",
                        "Pocatello city, ID",
                        "Parkersburg city, WV",
                        "Vienna city, WV",
                        "Grants Pass city, OR",
                        "Pine Bluff city, AR",
                        "Great Falls city, MT",
                        "Elmira city, NY",
                        "Kokomo city, IN",
                        "Midland city, MI",
                        "Bloomsburg town, PA",
                        "Berwick borough, PA",
                        "Columbus city, IN",
                        "Hinesville city, GA",
                        "Casper city, WY",
                        "Grand Island city, NE",
                        "Danville city, IL",
                        "Lewiston city, ID",
                        "Enid city, OK",
                        "Walla Walla city, WA",
                        "Carson City, NV")) %>%
  mutate(FIPS = str_pad(county, 5, "left", "0"),
         GEOID = paste(FIPS,fintract,sep="")) %>%
  mutate(afact_num = as.numeric(afact)) %>%
  select(GEOID,
         FIPS,
         placefp,
         placenm,
         afact_num)

paste("GEOID CHECK IN TRACT TO PLACE FILE")
range(nchar(trim(tract.to.pl$GEOID)))
#summary(tract.to.pl$afact_num)

## save file ##
save(tract.to.pl,
     file= paste0(output_path,
                  "/001_trt_t_pl.Rda"))

#################
## 1980 Census ##
#################

trt1980.in <- read.csv("trt1980.csv",
                       header = T,
                       stringsAsFactors = F)

trt1980.init <- trt1980.in %>%
  mutate(sfips = str_pad(Geo_STATE,2,"left","0"),
         cfips = str_pad(Geo_COUNTY,3, "left","0"),
         tfips = str_pad(Geo_TRACT6,6, "left","0"),
         GEOID = paste0(sfips,cfips,tfips)) %>%
  rename(pop_total = SE_T001_001,
         hhs = SE_T019_001,   
         pop_nh = SE_T013_002,
         pop_nh_white = SE_T013_003,
         pop_nh_black = SE_T013_004, 
         pop_nh_asianplus = SE_T013_005,
         pop_nh_other = SE_T013_006,
         pop_hl = SE_T013_007,
         gq_den = SE_T029_001,
         gq_num = SE_T029_002) %>%
  select(GEOID,
         pop_total,
         hhs,
         pop_nh,
         pop_nh_white,
         pop_nh_black,
         pop_nh_asianplus,
         pop_nh_other,
         pop_hl,
         gq_den,
         gq_num)

save(trt1980.init,
     file= paste0(output_path,
                  "/001_1980_trts.Rda"))

#################
## 1990 Census ##
#################

trt1990.in <- read.csv("trt1990.csv",
                       header = T,
                       stringsAsFactors = F)

trt1990.init <- trt1990.in %>%
  mutate(sfips = str_pad(Geo_STATE,2,"left","0"),
         cfips = str_pad(Geo_COUNTY,3, "left","0"),
         tfips = str_pad(Geo_TRACT,6, "left","0"),
         GEOID = paste0(sfips,cfips,tfips)) %>%
  rename(pop_total = SE_T001_001,
         hhs = SE_T016_001,   
         pop_nh = SE_T013_002,
         pop_nh_white = SE_T013_003,
         pop_nh_black = SE_T013_004,
         pop_nh_aian = SE_T013_005,
         pop_nh_asianpi = SE_T013_006,
         pop_nh_other = SE_T013_007,
         pop_hl = SE_T013_008,
         gq_num = SE_T021_002,
         gq_den = SE_T021_001) %>%
  select(GEOID,
         pop_total,
         hhs,
         pop_nh,
         pop_nh_white,
         pop_nh_black,
         pop_nh_aian,
         pop_nh_asianpi,
         pop_nh_other,
         pop_hl,
         gq_num,
         gq_den)

save(trt1990.init,
     file= paste0(output_path,
                  "/001_1990_trts.Rda"))

#################
## 2000 Census ##
#################

## View Census 2000 variables ##
v2000.sf <- load_variables(2000, "sf1", cache=TRUE)
v2000.lf <- load_variables(2000, "sf3", cache=TRUE)

## initialize lists to store data frames ##
state.tracts.2000.sf <- list() 
state.tracts.2000.lf <- list() 

## initialize counter ## 
state.counter <- 1

## start the loop ##
for (st in states){
  
  ## pull relevant variables for 2000 short-form Census by state and tract ## 
  
  census2000.sf <- get_decennial(geography = "tract",
                                 variables = c(pop_total = "P001001",
                                               hisp_race_den = "P008001",
                                               pop_nh = "P008002",
                                               pop_nh_white = "P008003",
                                               pop_nh_black = "P008004",
                                               pop_nh_aian = "P008005",
                                               pop_nh_asian = "P008006",
                                               pop_nh_nhpi = "P008007",
                                               pop_nh_other = "P008008",
                                               pop_nh_multi = "P008009",
                                               pop_hl = "P008010",
                                               pop_hl_white = "P008011",
                                               pop_hl_black = "P008012",
                                               pop_hl_aian = "P008013",
                                               pop_hl_asian = "P008014",
                                               pop_hl_nhpi = "P008015",
                                               pop_hl_other = "P008016",
                                               pop_hl_multi = "P008017",
                                               gq_den = "P027001",
                                               gq_num = "P027024",
                                               gq_tot = "P037001",
                                               gq_inst_pop = "P037002",
                                               hhs = "H004001"),
                                 state = st,
                                 sumfile = "sf1",
                                 output = "wide",
                                 year = 2000)
  
  
  ## pull relevant variables for 2000 long-form Census by state and tract ## 
  
  census2000.lf <- get_decennial(geography = "tract",
                                 variables = c(median_hhld_inc = "P053001",
                                               foreign_den = "P021001",
                                               foreign_born = "P021013",
                                               hhld_pov_den = "P092001",
                                               hhld_pov_num = "P092002",
                                               hhld_inc = "P052001",
                                               hhld_inc1 = "P052002",
                                               hhld_inc2 = "P052003",
                                               hhld_inc3 = "P052004",
                                               hhld_inc4 = "P052005",
                                               hhld_inc5 = "P052006",
                                               hhld_inc6 = "P052007",
                                               hhld_inc7 = "P052008",
                                               hhld_inc8 = "P052009",
                                               hhld_inc9 = "P052010",
                                               hhld_inc10 = "P052011",
                                               hhld_inc11 = "P052012",
                                               hhld_inc12 = "P052013",
                                               hhld_inc13 = "P052014",
                                               hhld_inc14 = "P052015",
                                               hhld_inc15 = "P052016",
                                               hhld_inc16 = "P052017",
                                               wh_hhld_inc = "P151A001",
                                               wh_hhld_inc1 = "P151A002",
                                               wh_hhld_inc2 = "P151A003",
                                               wh_hhld_inc3 = "P151A004",
                                               wh_hhld_inc4 = "P151A005",
                                               wh_hhld_inc5 = "P151A006",
                                               wh_hhld_inc6 = "P151A007",
                                               wh_hhld_inc7 = "P151A008",
                                               wh_hhld_inc8 = "P151A009",
                                               wh_hhld_inc9 = "P151A010",
                                               wh_hhld_inc10 = "P151A011",
                                               wh_hhld_inc11 = "P151A012",
                                               wh_hhld_inc12 = "P151A013",
                                               wh_hhld_inc13 = "P151A014",
                                               wh_hhld_inc14 = "P151A015",
                                               wh_hhld_inc15 = "P151A016",
                                               wh_hhld_inc16 = "P151A017",
                                               bl_hhld_inc = "P151B001",
                                               bl_hhld_inc1 = "P151B002",
                                               bl_hhld_inc2 = "P151B003",
                                               bl_hhld_inc3 = "P151B004",
                                               bl_hhld_inc4 = "P151B005",
                                               bl_hhld_inc5 = "P151B006",
                                               bl_hhld_inc6 = "P151B007",
                                               bl_hhld_inc7 = "P151B008",
                                               bl_hhld_inc8 = "P151B009",
                                               bl_hhld_inc9 = "P151B010",
                                               bl_hhld_inc10 = "P151B011",
                                               bl_hhld_inc11 = "P151B012",
                                               bl_hhld_inc12 = "P151B013",
                                               bl_hhld_inc13 = "P151B014",
                                               bl_hhld_inc14 = "P151B015",
                                               bl_hhld_inc15 = "P151B016",
                                               bl_hhld_inc16 = "P151B017",
                                               aian_hhld_inc = "P151C001",
                                               aian_hhld_inc1 = "P151C002",
                                               aian_hhld_inc2 = "P151C003",
                                               aian_hhld_inc3 = "P151C004",
                                               aian_hhld_inc4 = "P151C005",
                                               aian_hhld_inc5 = "P151C006",
                                               aian_hhld_inc6 = "P151C007",
                                               aian_hhld_inc7 = "P151C008",
                                               aian_hhld_inc8 = "P151C009",
                                               aian_hhld_inc9 = "P151C010",
                                               aian_hhld_inc10 = "P151C011",
                                               aian_hhld_inc11 = "P151C012",
                                               aian_hhld_inc12 = "P151C013",
                                               aian_hhld_inc13 = "P151C014",
                                               aian_hhld_inc14 = "P151C015",
                                               aian_hhld_inc15 = "P151C016",
                                               aian_hhld_inc16 = "P151C017",
                                               as_hhld_inc = "P151D001",
                                               as_hhld_inc1 = "P151D002",
                                               as_hhld_inc2 = "P151D003",
                                               as_hhld_inc3 = "P151D004",
                                               as_hhld_inc4 = "P151D005",
                                               as_hhld_inc5 = "P151D006",
                                               as_hhld_inc6 = "P151D007",
                                               as_hhld_inc7 = "P151D008",
                                               as_hhld_inc8 = "P151D009",
                                               as_hhld_inc9 = "P151D010",
                                               as_hhld_inc10 = "P151D011",
                                               as_hhld_inc11 = "P151D012",
                                               as_hhld_inc12 = "P151D013",
                                               as_hhld_inc13 = "P151D014",
                                               as_hhld_inc14 = "P151D015",
                                               as_hhld_inc15 = "P151D016",
                                               as_hhld_inc16 = "P151D017",
                                               nhpi_hhld_inc =  "P151E001",
                                               nhpi_hhld_inc1 = "P151E002",
                                               nhpi_hhld_inc2 = "P151E003",
                                               nhpi_hhld_inc3 = "P151E004",
                                               nhpi_hhld_inc4 = "P151E005",
                                               nhpi_hhld_inc5 = "P151E006",
                                               nhpi_hhld_inc6 = "P151E007",
                                               nhpi_hhld_inc7 = "P151E008",
                                               nhpi_hhld_inc8 = "P151E009",
                                               nhpi_hhld_inc9 = "P151E010",
                                               nhpi_hhld_inc10 = "P151E011",
                                               nhpi_hhld_inc11 = "P151E012",
                                               nhpi_hhld_inc12 = "P151E013",
                                               nhpi_hhld_inc13 = "P151E014",
                                               nhpi_hhld_inc14 = "P151E015",
                                               nhpi_hhld_inc15 = "P151E016",
                                               nhpi_hhld_inc16 = "P151E017",
                                               oth_hhld_inc = "P151F001",
                                               oth_hhld_inc1 = "P151F002",
                                               oth_hhld_inc2 = "P151F003",
                                               oth_hhld_inc3 = "P151F004",
                                               oth_hhld_inc4 = "P151F005",
                                               oth_hhld_inc5 = "P151F006",
                                               oth_hhld_inc6 = "P151F007",
                                               oth_hhld_inc7 = "P151F008",
                                               oth_hhld_inc8 = "P151F009",
                                               oth_hhld_inc9 = "P151F010",
                                               oth_hhld_inc10 = "P151F011",
                                               oth_hhld_inc11 = "P151F012",
                                               oth_hhld_inc12 = "P151F013",
                                               oth_hhld_inc13 = "P151F014",
                                               oth_hhld_inc14 = "P151F015",
                                               oth_hhld_inc15 = "P151F016",
                                               oth_hhld_inc16 = "P151F017",
                                               tw_hhld_inc = "P151G001",
                                               tw_hhld_inc1 = "P151G002",
                                               tw_hhld_inc2 = "P151G003",
                                               tw_hhld_inc3 = "P151G004",
                                               tw_hhld_inc4 = "P151G005",
                                               tw_hhld_inc5 = "P151G006",
                                               tw_hhld_inc6 = "P151G007",
                                               tw_hhld_inc7 = "P151G008",
                                               tw_hhld_inc8 = "P151G009",
                                               tw_hhld_inc9 = "P151G010",
                                               tw_hhld_inc10 = "P151G011",
                                               tw_hhld_inc11 = "P151G012",
                                               tw_hhld_inc12 = "P151G013",
                                               tw_hhld_inc13 = "P151G014",
                                               tw_hhld_inc14 = "P151G015",
                                               tw_hhld_inc15 = "P151G016",
                                               tw_hhld_inc16 = "P151G017",
                                               hl_hhld_inc = "P151H001",
                                               hl_hhld_inc1 = "P151H002",
                                               hl_hhld_inc2 = "P151H003",
                                               hl_hhld_inc3 = "P151H004",
                                               hl_hhld_inc4 = "P151H005",
                                               hl_hhld_inc5 = "P151H006",
                                               hl_hhld_inc6 = "P151H007",
                                               hl_hhld_inc7 = "P151H008",
                                               hl_hhld_inc8 = "P151H009",
                                               hl_hhld_inc9 = "P151H010",
                                               hl_hhld_inc10 = "P151H011",
                                               hl_hhld_inc11 = "P151H012",
                                               hl_hhld_inc12 = "P151H013",
                                               hl_hhld_inc13 = "P151H014",
                                               hl_hhld_inc14 = "P151H015",
                                               hl_hhld_inc15 = "P151H016",
                                               hl_hhld_inc16 = "P151H017",
                                               nhwh_hhld_inc = "P151I001",
                                               nhwh_hhld_inc1 = "P151I002",
                                               nhwh_hhld_inc2 = "P151I003",
                                               nhwh_hhld_inc3 = "P151I004",
                                               nhwh_hhld_inc4 = "P151I005",
                                               nhwh_hhld_inc5 = "P151I006",
                                               nhwh_hhld_inc6 = "P151I007",
                                               nhwh_hhld_inc7 = "P151I008",
                                               nhwh_hhld_inc8 = "P151I009",
                                               nhwh_hhld_inc9 = "P151I010",
                                               nhwh_hhld_inc10 = "P151I011",
                                               nhwh_hhld_inc11 = "P151I012",
                                               nhwh_hhld_inc12 = "P151I013",
                                               nhwh_hhld_inc13 = "P151I014",
                                               nhwh_hhld_inc14 = "P151I015",
                                               nhwh_hhld_inc15 = "P151I016",
                                               nhwh_hhld_inc16 = "P151I017",
                                               r0 = "P004001"),
                                 state = st,
                                 sumfile = "sf3",
                                 output = "wide",
                                 year = 2000)
  
  ## add state identifying variable ##
  census2000.sf$state <- st
  census2000.lf$state <- st
  
  ## store the data frame in the list ## 
  state.tracts.2000.sf[[state.counter]] <- census2000.sf
  state.tracts.2000.lf[[state.counter]] <- census2000.lf
  
  ## increase interval by 1 ## 
  state.counter <- state.counter + 1
  
}

## combine all the data ## 
all.tracts.2000.sf <- bind_rows(state.tracts.2000.sf)
all.tracts.2000.lf <- bind_rows(state.tracts.2000.lf)

##
## merge data sets ##
##

all.tracts.2000.cb.m <- stata.merge(all.tracts.2000.sf,
                                    all.tracts.2000.lf,
                                    c("GEOID","state"))

## check merge ## 
paste("2000 TRACTS MERGE CHECK")
table(all.tracts.2000.cb.m$merge.variable, useNA = "ifany")

## clean ##
all.tracts.2000.cb <- all.tracts.2000.cb.m %>%
  select(-merge.variable, -NAME.x, -NAME.y) %>%
  mutate(r0 = r0/100)

## save the 2000 Census data ## 
save(all.tracts.2000.cb,
     file= paste(output_path,
                 "/001_2000_trts_cb.Rda", 
                 sep=""))

##
## 2000 counties downloaded via Social Explorer ##
## (these files already combine short and long form data) ##
##

all.counties.2000.in <- read.csv("census2000_counties.csv",
                                 header=T,
                                 stringsAsFactors = F)

all.counties.2000 <- all.counties.2000.in %>%
  mutate(GEOID = str_pad(Geo_FIPS, 5, "left", "0")) %>%
  rename(pop_total = SE_T001_001,
         pop_nh = SE_T017_002,
         pop_nh_white = SE_T017_003,
         pop_nh_black = SE_T017_004,
         pop_nh_aian = SE_T017_005,
         pop_nh_asian = SE_T017_006,
         pop_nh_nhpi = SE_T017_007,
         pop_nh_other = SE_T017_008,
         pop_nh_multi = SE_T017_009,
         pop_hl = SE_T017_010,
         median_hhld_inc = SE_T093_001) %>%
  select(GEOID,
         pop_total,
         pop_nh,
         pop_nh_white,
         pop_nh_black,
         pop_nh_aian,
         pop_nh_asian,
         pop_nh_nhpi,
         pop_nh_other,
         pop_nh_multi,
         pop_hl,
         median_hhld_inc)

## need to add Broomfield, CO ## 

broomfield.2000.in <- read.csv("broomfield_co_2000.csv",
                               header=T,
                               stringsAsFactors = F)

broomfield.2000 <- broomfield.2000.in %>%
  mutate(GEOID = "08014") %>%
  rename(pop_total = SE_T001_001,
         pop_nh = SE_T015_002,
         pop_nh_white = SE_T015_003,
         pop_nh_black = SE_T015_004,
         pop_nh_aian = SE_T015_005,
         pop_nh_asian = SE_T015_006,
         pop_nh_nhpi = SE_T015_007,
         pop_nh_other = SE_T015_008,
         pop_nh_multi = SE_T015_009,
         pop_hl = SE_T015_010,
         median_hhld_inc = SE_T093_001) %>%
  select(GEOID,
         pop_total,
         pop_nh,
         pop_nh_white,
         pop_nh_black,
         pop_nh_aian,
         pop_nh_asian,
         pop_nh_nhpi,
         pop_nh_other,
         pop_nh_multi,
         pop_hl,
         median_hhld_inc)

all.counties.2000.cb <- rbind(all.counties.2000,
                              broomfield.2000)

## save file ##
save(all.counties.2000.cb,
     file= paste0(output_path,
                  "001_2000_counties_cb.Rda"))

#################
## 2010 Census ##
#################

## View Census 2010 and ACS 2008-2012 5-year estimates ##
v2010 <- load_variables(2010, "sf1", cache=TRUE)
v2012 <- load_variables(2012, "acs5", cache=TRUE)

## initialize lists to store data frames ##
state.tracts.2010.census <- list() 
state.tracts.2010.acs <- list()

## initialize counter ## 
state.counter <- 1

## start the loop ##
for (st in states){
  
  ## pull relevant variables for 2010 Census by state and tract ## 
  
  census2010 <- get_decennial(geography = "tract",
                              variables = c(pop_total = "P001001",
                                            hisp_race_den = "P005001",
                                            pop_nh = "P005002",
                                            pop_nh_white = "P005003",
                                            pop_nh_black = "P005004",
                                            pop_nh_aian = "P005005",
                                            pop_nh_asian = "P005006",
                                            pop_nh_nhpi = "P005007",
                                            pop_nh_other = "P005008",
                                            pop_nh_multi = "P005009",
                                            pop_hl = "P005010",
                                            pop_hl_white = "P005011",
                                            pop_hl_black = "P005012",
                                            pop_hl_aian = "P005013",
                                            pop_hl_asian = "P005014",
                                            pop_hl_nhpi = "P005015",
                                            pop_hl_other = "P005016",
                                            pop_hl_multi = "P005017",
                                            gq_den = "P029001",
                                            gq_num = "P029026",
                                            gq_tot = "P042001",
                                            gq_inst_pop = "P029027",
                                            hhs = "H014001"),
                                 state = st,
                                 sumfile = "sf1",
                                 output = "wide",
                                 year = 2010)
  
  ## pull relevant variables for 2008-2012 ACS by state and tract ## 
  
  acs2012 <- get_acs(geography = "tract",
                     variables = c(median_hhld_inc = "B19013_001",
                                   foreign_den = "B05002_001",
                                   foreign_born = "B05002_013",
                                   hhld_pov_den = "B17017_001",
                                   hhld_pov_num = "B17017_002",
                                   hhld_inc = "B19001_001",
                                   hhld_inc1 = "B19001_002",
                                   hhld_inc2 = "B19001_003",
                                   hhld_inc3 = "B19001_004",
                                   hhld_inc4 = "B19001_005",
                                   hhld_inc5 = "B19001_006",
                                   hhld_inc6 = "B19001_007",
                                   hhld_inc7 = "B19001_008",
                                   hhld_inc8 = "B19001_009",
                                   hhld_inc9 = "B19001_010",
                                   hhld_inc10 = "B19001_011",
                                   hhld_inc11 = "B19001_012",
                                   hhld_inc12 = "B19001_013",
                                   hhld_inc13 = "B19001_014",
                                   hhld_inc14 = "B19001_015",
                                   hhld_inc15 = "B19001_016",
                                   hhld_inc16 = "B19001_017",
                                   wh_hhld_inc = "B19001A_001",
                                   wh_hhld_inc1 = "B19001A_002",
                                   wh_hhld_inc2 = "B19001A_003",
                                   wh_hhld_inc3 = "B19001A_004",
                                   wh_hhld_inc4 = "B19001A_005",
                                   wh_hhld_inc5 = "B19001A_006",
                                   wh_hhld_inc6 = "B19001A_007",
                                   wh_hhld_inc7 = "B19001A_008",
                                   wh_hhld_inc8 = "B19001A_009",
                                   wh_hhld_inc9 = "B19001A_010",
                                   wh_hhld_inc10 = "B19001A_011",
                                   wh_hhld_inc11 = "B19001A_012",
                                   wh_hhld_inc12 = "B19001A_013",
                                   wh_hhld_inc13 = "B19001A_014",
                                   wh_hhld_inc14 = "B19001A_015",
                                   wh_hhld_inc15 = "B19001A_016",
                                   wh_hhld_inc16 = "B19001A_017",
                                   bl_hhld_inc = "B19001B_001",
                                   bl_hhld_inc1 = "B19001B_002",
                                   bl_hhld_inc2 = "B19001B_003",
                                   bl_hhld_inc3 = "B19001B_004",
                                   bl_hhld_inc4 = "B19001B_005",
                                   bl_hhld_inc5 = "B19001B_006",
                                   bl_hhld_inc6 = "B19001B_007",
                                   bl_hhld_inc7 = "B19001B_008",
                                   bl_hhld_inc8 = "B19001B_009",
                                   bl_hhld_inc9 = "B19001B_010",
                                   bl_hhld_inc10 = "B19001B_011",
                                   bl_hhld_inc11 = "B19001B_012",
                                   bl_hhld_inc12 = "B19001B_013",
                                   bl_hhld_inc13 = "B19001B_014",
                                   bl_hhld_inc14 = "B19001B_015",
                                   bl_hhld_inc15 = "B19001B_016",
                                   bl_hhld_inc16 = "B19001B_017",
                                   aian_hhld_inc = "B19001C_001",
                                   aian_hhld_inc1 = "B19001C_002",
                                   aian_hhld_inc2 = "B19001C_003",
                                   aian_hhld_inc3 = "B19001C_004",
                                   aian_hhld_inc4 = "B19001C_005",
                                   aian_hhld_inc5 = "B19001C_006",
                                   aian_hhld_inc6 = "B19001C_007",
                                   aian_hhld_inc7 = "B19001C_008",
                                   aian_hhld_inc8 = "B19001C_009",
                                   aian_hhld_inc9 = "B19001C_010",
                                   aian_hhld_inc10 = "B19001C_011",
                                   aian_hhld_inc11 = "B19001C_012",
                                   aian_hhld_inc12 = "B19001C_013",
                                   aian_hhld_inc13 = "B19001C_014",
                                   aian_hhld_inc14 = "B19001C_015",
                                   aian_hhld_inc15 = "B19001C_016",
                                   aian_hhld_inc16 = "B19001C_017",
                                   as_hhld_inc = "B19001D_001",
                                   as_hhld_inc1 = "B19001D_002",
                                   as_hhld_inc2 = "B19001D_003",
                                   as_hhld_inc3 = "B19001D_004",
                                   as_hhld_inc4 = "B19001D_005",
                                   as_hhld_inc5 = "B19001D_006",
                                   as_hhld_inc6 = "B19001D_007",
                                   as_hhld_inc7 = "B19001D_008",
                                   as_hhld_inc8 = "B19001D_009",
                                   as_hhld_inc9 = "B19001D_010",
                                   as_hhld_inc10 = "B19001D_011",
                                   as_hhld_inc11 = "B19001D_012",
                                   as_hhld_inc12 = "B19001D_013",
                                   as_hhld_inc13 = "B19001D_014",
                                   as_hhld_inc14 = "B19001D_015",
                                   as_hhld_inc15 = "B19001D_016",
                                   as_hhld_inc16 = "B19001D_017",
                                   nhpi_hhld_inc =  "B19001E_001",
                                   nhpi_hhld_inc1 = "B19001E_002",
                                   nhpi_hhld_inc2 = "B19001E_003",
                                   nhpi_hhld_inc3 = "B19001E_004",
                                   nhpi_hhld_inc4 = "B19001E_005",
                                   nhpi_hhld_inc5 = "B19001E_006",
                                   nhpi_hhld_inc6 = "B19001E_007",
                                   nhpi_hhld_inc7 = "B19001E_008",
                                   nhpi_hhld_inc8 = "B19001E_009",
                                   nhpi_hhld_inc9 = "B19001E_010",
                                   nhpi_hhld_inc10 = "B19001E_011",
                                   nhpi_hhld_inc11 = "B19001E_012",
                                   nhpi_hhld_inc12 = "B19001E_013",
                                   nhpi_hhld_inc13 = "B19001E_014",
                                   nhpi_hhld_inc14 = "B19001E_015",
                                   nhpi_hhld_inc15 = "B19001E_016",
                                   nhpi_hhld_inc16 = "B19001E_017",
                                   oth_hhld_inc = "B19001F_001",
                                   oth_hhld_inc1 = "B19001F_002",
                                   oth_hhld_inc2 = "B19001F_003",
                                   oth_hhld_inc3 = "B19001F_004",
                                   oth_hhld_inc4 = "B19001F_005",
                                   oth_hhld_inc5 = "B19001F_006",
                                   oth_hhld_inc6 = "B19001F_007",
                                   oth_hhld_inc7 = "B19001F_008",
                                   oth_hhld_inc8 = "B19001F_009",
                                   oth_hhld_inc9 = "B19001F_010",
                                   oth_hhld_inc10 = "B19001F_011",
                                   oth_hhld_inc11 = "B19001F_012",
                                   oth_hhld_inc12 = "B19001F_013",
                                   oth_hhld_inc13 = "B19001F_014",
                                   oth_hhld_inc14 = "B19001F_015",
                                   oth_hhld_inc15 = "B19001F_016",
                                   oth_hhld_inc16 = "B19001F_017",
                                   tw_hhld_inc = "B19001G_001",
                                   tw_hhld_inc1 = "B19001G_002",
                                   tw_hhld_inc2 = "B19001G_003",
                                   tw_hhld_inc3 = "B19001G_004",
                                   tw_hhld_inc4 = "B19001G_005",
                                   tw_hhld_inc5 = "B19001G_006",
                                   tw_hhld_inc6 = "B19001G_007",
                                   tw_hhld_inc7 = "B19001G_008",
                                   tw_hhld_inc8 = "B19001G_009",
                                   tw_hhld_inc9 = "B19001G_010",
                                   tw_hhld_inc10 = "B19001G_011",
                                   tw_hhld_inc11 = "B19001G_012",
                                   tw_hhld_inc12 = "B19001G_013",
                                   tw_hhld_inc13 = "B19001G_014",
                                   tw_hhld_inc14 = "B19001G_015",
                                   tw_hhld_inc15 = "B19001G_016",
                                   tw_hhld_inc16 = "B19001G_017",
                                   hl_hhld_inc = "B19001I_001",
                                   hl_hhld_inc1 = "B19001I_002",
                                   hl_hhld_inc2 = "B19001I_003",
                                   hl_hhld_inc3 = "B19001I_004",
                                   hl_hhld_inc4 = "B19001I_005",
                                   hl_hhld_inc5 = "B19001I_006",
                                   hl_hhld_inc6 = "B19001I_007",
                                   hl_hhld_inc7 = "B19001I_008",
                                   hl_hhld_inc8 = "B19001I_009",
                                   hl_hhld_inc9 = "B19001I_010",
                                   hl_hhld_inc10 = "B19001I_011",
                                   hl_hhld_inc11 = "B19001I_012",
                                   hl_hhld_inc12 = "B19001I_013",
                                   hl_hhld_inc13 = "B19001I_014",
                                   hl_hhld_inc14 = "B19001I_015",
                                   hl_hhld_inc15 = "B19001I_016",
                                   hl_hhld_inc16 = "B19001I_017",
                                   nhwh_hhld_inc = "B19001H_001",
                                   nhwh_hhld_inc1 = "B19001H_002",
                                   nhwh_hhld_inc2 = "B19001H_003",
                                   nhwh_hhld_inc3 = "B19001H_004",
                                   nhwh_hhld_inc4 = "B19001H_005",
                                   nhwh_hhld_inc5 = "B19001H_006",
                                   nhwh_hhld_inc6 = "B19001H_007",
                                   nhwh_hhld_inc7 = "B19001H_008",
                                   nhwh_hhld_inc8 = "B19001H_009",
                                   nhwh_hhld_inc9 = "B19001H_010",
                                   nhwh_hhld_inc10 = "B19001H_011",
                                   nhwh_hhld_inc11 = "B19001H_012",
                                   nhwh_hhld_inc12 = "B19001H_013",
                                   nhwh_hhld_inc13 = "B19001H_014",
                                   nhwh_hhld_inc14 = "B19001H_015",
                                   nhwh_hhld_inc15 = "B19001H_016",
                                   nhwh_hhld_inc16 = "B19001H_017",
                                   hsuint = "B98001_002",
                                   uwhhs = "B00002_001"),
                     state = st,
                     survey = "acs5",
                     output = "wide",
                     year = 2012)
  
  ## add state identifying variable ##
  census2010$state <- st
  acs2012$state <- st
  
  ## store the data frame in the list ## 
  state.tracts.2010.census[[state.counter]] <- census2010
  state.tracts.2010.acs[[state.counter]] <- acs2012
  
  ## increase interval by 1 ## 
  state.counter <- state.counter + 1
  
}

## combine all the data ## 
all.tracts.2010.census <- bind_rows(state.tracts.2010.census)
all.tracts.2010.acs <- bind_rows(state.tracts.2010.acs)

## get rid of tract 36085009700 (all 0s)  ##
all.tracts.2010.census <- filter(all.tracts.2010.census, GEOID != "36085009700")

## correct GEOIDs according to Census guidance ## 
## https://www2.census.gov/geo/pdfs/reference/Geography_Notes.pdf ##

all.tracts.2010.census$GEOID[all.tracts.2010.census$GEOID == "04019002701"] <- "04019002704"
all.tracts.2010.census$GEOID[all.tracts.2010.census$GEOID == "04019002903"] <- "04019002906"
all.tracts.2010.census$GEOID[all.tracts.2010.census$GEOID == "04019410501"] <- "04019004118"
all.tracts.2010.census$GEOID[all.tracts.2010.census$GEOID == "04019410502"] <- "04019004121"
all.tracts.2010.census$GEOID[all.tracts.2010.census$GEOID == "04019410503"] <- "04019004125"
all.tracts.2010.census$GEOID[all.tracts.2010.census$GEOID == "04019470400"] <- "04019005200"
all.tracts.2010.census$GEOID[all.tracts.2010.census$GEOID == "04019470500"] <- "04019005300"
all.tracts.2010.census$GEOID[all.tracts.2010.census$GEOID == "06037930401"] <- "06037137000"
all.tracts.2010.census$GEOID[all.tracts.2010.census$GEOID == "36053940101"] <- "36053030101"
all.tracts.2010.census$GEOID[all.tracts.2010.census$GEOID == "36053940102"] <- "36053030102"
all.tracts.2010.census$GEOID[all.tracts.2010.census$GEOID == "36053940103"] <- "36053030103"
all.tracts.2010.census$GEOID[all.tracts.2010.census$GEOID == "36053940200"] <- "36053030200"
all.tracts.2010.census$GEOID[all.tracts.2010.census$GEOID == "36053940300"] <- "36053030300"
all.tracts.2010.census$GEOID[all.tracts.2010.census$GEOID == "36053940401"] <- "36053030401"
all.tracts.2010.census$GEOID[all.tracts.2010.census$GEOID == "36053940403"] <- "36053030403"
all.tracts.2010.census$GEOID[all.tracts.2010.census$GEOID == "36053940600"] <- "36053030600"
all.tracts.2010.census$GEOID[all.tracts.2010.census$GEOID == "36053940700"] <- "36053030402"
all.tracts.2010.census$GEOID[all.tracts.2010.census$GEOID == "36065940000"] <- "36065024800"
all.tracts.2010.census$GEOID[all.tracts.2010.census$GEOID == "36065940100"] <- "36065024700"
all.tracts.2010.census$GEOID[all.tracts.2010.census$GEOID == "36065940200"] <- "36065024900"
all.tracts.2010.census$GEOID[all.tracts.2010.census$GEOID == "36085008900"] <- "36085009700"

##
## merge both data sets ##
##

all.tracts.2010.cb.m <- stata.merge(all.tracts.2010.census,
                                    all.tracts.2010.acs,
                                    c("GEOID","state"))

## check merge ## 
paste("2020 TRACTS MERGE CHECK")
table(all.tracts.2010.cb.m$merge.variable)

## keep matches ##
all.tracts.2010.cbfm <- all.tracts.2010.cb.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable) %>%
  mutate(cfips = substr(GEOID,1,5))

##
## get county info (needed for sampling rate to calculate h4) ##
##

countyinc10 <- get_acs(geography = "county", 
                       variables = c(totpop = "B01003_001",
                                     totsel = "B98001_001",
                                     hsuint = "B98001_002",
                                     tothu = "B25001_001"),
                       survey = "acs5",
                       output = "wide",
                       year = 2012)

all.county.inc.2010.final <- countyinc10 %>%
  select(GEOID,
         ends_with("E")) %>%
  rename_at(.vars = vars(ends_with("E")),
            .funs = funs(sub("E", "", .))) %>%
  mutate(r0 = hsuint/tothu,
         cfips = GEOID) %>%
  select(cfips,
         r0)

##
## now, merge on county sampling rate to tracts ##
##

paste("IS 2010 TRACT FILE UNIQUE BY CFIPS?")
nrow(all.tracts.2010.cbfm) == length(unique(all.tracts.2010.cbfm$cfips))
paste("CLASS OF CFIPS VAR IN 2010 TRACTS FILE")
class(all.tracts.2010.cbfm$cfips)
paste("CFIPS CHECK IN 2010 TRACTS FILE")
range(nchar(trim(all.tracts.2010.cbfm$cfips)))

paste("IS 2010 COUNTY INC FILE UNIQUE BY FIPS?")
nrow(all.county.inc.2010.final) == length(unique(all.county.inc.2010.final$cfips))
paste("CLASS OF CFIPS VAR IN 2010 COUNTY INC FILE")
class(all.county.inc.2010.final$cfips)
paste("CFIPS CHECK IN 2010 COUNTY INC FILE")
range(nchar(trim(all.county.inc.2010.final$cfips)))

all.tracts.2010.cb.m <- stata.merge(all.tracts.2010.cbfm,
                                    all.county.inc.2010.final,
                                    "cfips")

## check merge ##
paste("MERGE OF 2010 TRACTS and 2010 COUNTY INC FILES")
table(all.tracts.2010.cb.m$merge.variable, useNA = "ifany")

## verified - these are counties in PR ## 
m2.pr.1 <- all.tracts.2010.cb.m %>%
  filter(merge.variable == 2) %>%
  select(cfips) %>%
  rename(GEOID = cfips) %>%
  inner_join(countyinc10, "GEOID") %>%
  filter(!grepl("Puerto Rico", NAME))

paste("ARE ALL THE UNMATCHED OBS IN PUERTO RICO?")
nrow(m2.pr.1) == 0

## keep matches, clean ##
all.tracts.2010.cb <- all.tracts.2010.cb.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable, 
         -NAME.x, 
         -NAME.y, 
         -ends_with("M", ignore.case = FALSE)) %>%
  rename_with(~str_remove(., 'E')) %>%
  rename(GEOID = GOID)

## save the 2010 Census data ## 
save(all.tracts.2010.cb,
     file= paste(output_path,
                 "/001_2010_trts_cb.Rda", 
                 sep=""))

##
## now, metro level vars ##
##

all.counties.census.2010 <- get_decennial(geography = "county",
                                          variables = c(pop_total = "P001001",
                                                        hisp_race_den = "P005001",
                                                        pop_nh = "P005002",
                                                        pop_nh_white = "P005003",
                                                        pop_nh_black = "P005004",
                                                        pop_nh_aian = "P005005",
                                                        pop_nh_asian = "P005006",
                                                        pop_nh_nhpi = "P005007",
                                                        pop_nh_other = "P005008",
                                                        pop_nh_multi = "P005009",
                                                        pop_hl = "P005010",
                                                        pop_hl_white = "P005011",
                                                        pop_hl_black = "P005012",
                                                        pop_hl_aian = "P005013",
                                                        pop_hl_asian = "P005014",
                                                        pop_hl_nhpi = "P005015",
                                                        pop_hl_other = "P005016",
                                                        pop_hl_multi = "P005017"),
                                          sumfile = "sf1",
                                          output = "wide",
                                          year = 2010)

all.counties.acs.2012 <- get_acs(geography = "county",
                                 variables = c(median_hhld_inc = "B19013_001"),
                                 survey = "acs5",
                                 output = "wide",
                                 year = 2012)

##
## merge files 
##

all.counties.2010.cb.m <- stata.merge(all.counties.census.2010,
                                      all.counties.acs.2012,
                                      "GEOID")

## check merge ##
paste("2010 COUNTY FILES MERGE")
table(all.counties.2010.cb.m$merge.variable, useNA = "ifany")

## keep matches ## 
all.counties.2010.cb <- all.counties.2010.cb.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable,
         -NAME.x,
         -NAME.y,
         -ends_with("M",ignore.case = FALSE)) %>%
  rename_with(~str_remove(., 'E')) %>%
  rename(GEOID = GOID)

## save the 2010 Census data ## 
save(all.counties.2010.cb,
     file= paste(output_path,
                "/001_2010_counties_cb.Rda", 
                sep=""))


###########################
## 2020 preliminary data ##
###########################

##
## first, tracts ##
##

all.tracts.census.2020.in <- read.csv("census2020_trt_prelim.csv",
                                      header=T,
                                      stringsAsFactors = F)

all.tracts.census.2020 <- all.tracts.census.2020.in %>%
  mutate(GEOID = str_pad(Geo_FIPS, 11, "left","0")) %>%
  rename(pop_total = SE_T003_001,
         pop_nh = SE_T004_001,
         pop_nh_white = SE_T004_003,
         pop_nh_black = SE_T004_005,
         pop_nh_aian = SE_T004_007,
         pop_nh_asian = SE_T004_009,
         pop_nh_nhpi = SE_T004_011,
         pop_nh_other = SE_T004_013,
         pop_nh_multi = SE_T004_015,
         pop_hl = SE_T004_017) %>%
  select(GEOID,
         pop_total,
         pop_nh,
         pop_nh_white,
         pop_nh_black,
         pop_nh_aian,
         pop_nh_asian,
         pop_nh_nhpi,
         pop_nh_other,
         pop_nh_multi,
         pop_hl)

##
## next, counties ##
##

all.counties.census.2020.in <- read.csv("census2020_county_prelim.csv",
                                        header=T,
                                        stringsAsFactors = F)
 
all.counties.census.2020 <- all.counties.census.2020.in %>%
  mutate(GEOID = str_pad(Geo_FIPS, 5, "left","0")) %>%
  rename(pop_total = SE_T003_001,
         pop_nh = SE_T004_001,
         pop_nh_white = SE_T004_003,
         pop_nh_black = SE_T004_005,
         pop_nh_aian = SE_T004_007,
         pop_nh_asian = SE_T004_009,
         pop_nh_nhpi = SE_T004_011,
         pop_nh_other = SE_T004_013,
         pop_nh_multi = SE_T004_015,
         pop_hl = SE_T004_017) %>%
  select(GEOID,
         pop_total,
         pop_nh,
         pop_nh_white,
         pop_nh_black,
         pop_nh_aian,
         pop_nh_asian,
         pop_nh_nhpi,
         pop_nh_other,
         pop_nh_multi,
         pop_hl)

##
## necessary fix for CT boundaries: https://www.federalregister.gov/documents/2022/06/06/2022-12063/change-to-county-equivalents-in-the-state-of-connecticut
## combine Hartford, CT and Tolland, CT counties to match 2022 ACS boundaries ## 
##

all.counties.census.2020$GEOID[all.counties.census.2020$GEOID == "09013"] <- "09003"

all.counties.census.2020.rd <- all.counties.census.2020 %>%
  group_by(GEOID) %>%
  summarize_if(is.numeric, sum, na.rm=T)

paste("OBS PRE FIX FOR 2022 CT CHANGE")
nrow(all.counties.census.2020)
paste("OBS POST FIX FOR 2022 CT CHANGE")
nrow(all.counties.census.2020.rd)

####################################
## 2018-2022 ACS 5-year estimates ##
####################################

## View ACS 2018-2022 5-year estimates ##
v2022 <- load_variables(2022, "acs5", cache = TRUE) 

## initialize list to store data frames ##
state.tracts.2022 <- list() 

## initialize counter ## 
state.counter <- 1

## start the loop ##
for (st in states){
  
  ## pull relevant variables for 2018-2022 ACS by state and tract ## 
  
  acs.2022 <- get_acs(geography = "tract",  
                      variables = c(pop_total_acs = "B01003_001",
                                    gq_den = "B09019_001",
                                    gq_num = "B09019_026",
                                    gq_tot = "B26001_001",
                                    gq_inst_pop = "B26101_067",
                                    hhs = "B25003_001",
                                    median_hhld_inc = "B19013_001",
                                    foreign_den = "B05002_001",
                                    foreign_born = "B05002_013",
                                    hhld_pov_den = "B17017_001",
                                    hhld_pov_num = "B17017_002",
                                    hhld_inc = "B19001_001",
                                    hhld_inc1 = "B19001_002",
                                    hhld_inc2 = "B19001_003",
                                    hhld_inc3 = "B19001_004",
                                    hhld_inc4 = "B19001_005",
                                    hhld_inc5 = "B19001_006",
                                    hhld_inc6 = "B19001_007",
                                    hhld_inc7 = "B19001_008",
                                    hhld_inc8 = "B19001_009",
                                    hhld_inc9 = "B19001_010",
                                    hhld_inc10 = "B19001_011",
                                    hhld_inc11 = "B19001_012",
                                    hhld_inc12 = "B19001_013",
                                    hhld_inc13 = "B19001_014",
                                    hhld_inc14 = "B19001_015",
                                    hhld_inc15 = "B19001_016",
                                    hhld_inc16 = "B19001_017",
                                    wh_hhld_inc = "B19001A_001",
                                    wh_hhld_inc1 = "B19001A_002",
                                    wh_hhld_inc2 = "B19001A_003",
                                    wh_hhld_inc3 = "B19001A_004",
                                    wh_hhld_inc4 = "B19001A_005",
                                    wh_hhld_inc5 = "B19001A_006",
                                    wh_hhld_inc6 = "B19001A_007",
                                    wh_hhld_inc7 = "B19001A_008",
                                    wh_hhld_inc8 = "B19001A_009",
                                    wh_hhld_inc9 = "B19001A_010",
                                    wh_hhld_inc10 = "B19001A_011",
                                    wh_hhld_inc11 = "B19001A_012",
                                    wh_hhld_inc12 = "B19001A_013",
                                    wh_hhld_inc13 = "B19001A_014",
                                    wh_hhld_inc14 = "B19001A_015",
                                    wh_hhld_inc15 = "B19001A_016",
                                    wh_hhld_inc16 = "B19001A_017",
                                    bl_hhld_inc = "B19001B_001",
                                    bl_hhld_inc1 = "B19001B_002",
                                    bl_hhld_inc2 = "B19001B_003",
                                    bl_hhld_inc3 = "B19001B_004",
                                    bl_hhld_inc4 = "B19001B_005",
                                    bl_hhld_inc5 = "B19001B_006",
                                    bl_hhld_inc6 = "B19001B_007",
                                    bl_hhld_inc7 = "B19001B_008",
                                    bl_hhld_inc8 = "B19001B_009",
                                    bl_hhld_inc9 = "B19001B_010",
                                    bl_hhld_inc10 = "B19001B_011",
                                    bl_hhld_inc11 = "B19001B_012",
                                    bl_hhld_inc12 = "B19001B_013",
                                    bl_hhld_inc13 = "B19001B_014",
                                    bl_hhld_inc14 = "B19001B_015",
                                    bl_hhld_inc15 = "B19001B_016",
                                    bl_hhld_inc16 = "B19001B_017",
                                    aian_hhld_inc = "B19001C_001",
                                    aian_hhld_inc1 = "B19001C_002",
                                    aian_hhld_inc2 = "B19001C_003",
                                    aian_hhld_inc3 = "B19001C_004",
                                    aian_hhld_inc4 = "B19001C_005",
                                    aian_hhld_inc5 = "B19001C_006",
                                    aian_hhld_inc6 = "B19001C_007",
                                    aian_hhld_inc7 = "B19001C_008",
                                    aian_hhld_inc8 = "B19001C_009",
                                    aian_hhld_inc9 = "B19001C_010",
                                    aian_hhld_inc10 = "B19001C_011",
                                    aian_hhld_inc11 = "B19001C_012",
                                    aian_hhld_inc12 = "B19001C_013",
                                    aian_hhld_inc13 = "B19001C_014",
                                    aian_hhld_inc14 = "B19001C_015",
                                    aian_hhld_inc15 = "B19001C_016",
                                    aian_hhld_inc16 = "B19001C_017",
                                    as_hhld_inc = "B19001D_001",
                                    as_hhld_inc1 = "B19001D_002",
                                    as_hhld_inc2 = "B19001D_003",
                                    as_hhld_inc3 = "B19001D_004",
                                    as_hhld_inc4 = "B19001D_005",
                                    as_hhld_inc5 = "B19001D_006",
                                    as_hhld_inc6 = "B19001D_007",
                                    as_hhld_inc7 = "B19001D_008",
                                    as_hhld_inc8 = "B19001D_009",
                                    as_hhld_inc9 = "B19001D_010",
                                    as_hhld_inc10 = "B19001D_011",
                                    as_hhld_inc11 = "B19001D_012",
                                    as_hhld_inc12 = "B19001D_013",
                                    as_hhld_inc13 = "B19001D_014",
                                    as_hhld_inc14 = "B19001D_015",
                                    as_hhld_inc15 = "B19001D_016",
                                    as_hhld_inc16 = "B19001D_017",
                                    nhpi_hhld_inc =  "B19001E_001",
                                    nhpi_hhld_inc1 = "B19001E_002",
                                    nhpi_hhld_inc2 = "B19001E_003",
                                    nhpi_hhld_inc3 = "B19001E_004",
                                    nhpi_hhld_inc4 = "B19001E_005",
                                    nhpi_hhld_inc5 = "B19001E_006",
                                    nhpi_hhld_inc6 = "B19001E_007",
                                    nhpi_hhld_inc7 = "B19001E_008",
                                    nhpi_hhld_inc8 = "B19001E_009",
                                    nhpi_hhld_inc9 = "B19001E_010",
                                    nhpi_hhld_inc10 = "B19001E_011",
                                    nhpi_hhld_inc11 = "B19001E_012",
                                    nhpi_hhld_inc12 = "B19001E_013",
                                    nhpi_hhld_inc13 = "B19001E_014",
                                    nhpi_hhld_inc14 = "B19001E_015",
                                    nhpi_hhld_inc15 = "B19001E_016",
                                    nhpi_hhld_inc16 = "B19001E_017",
                                    oth_hhld_inc = "B19001F_001",
                                    oth_hhld_inc1 = "B19001F_002",
                                    oth_hhld_inc2 = "B19001F_003",
                                    oth_hhld_inc3 = "B19001F_004",
                                    oth_hhld_inc4 = "B19001F_005",
                                    oth_hhld_inc5 = "B19001F_006",
                                    oth_hhld_inc6 = "B19001F_007",
                                    oth_hhld_inc7 = "B19001F_008",
                                    oth_hhld_inc8 = "B19001F_009",
                                    oth_hhld_inc9 = "B19001F_010",
                                    oth_hhld_inc10 = "B19001F_011",
                                    oth_hhld_inc11 = "B19001F_012",
                                    oth_hhld_inc12 = "B19001F_013",
                                    oth_hhld_inc13 = "B19001F_014",
                                    oth_hhld_inc14 = "B19001F_015",
                                    oth_hhld_inc15 = "B19001F_016",
                                    oth_hhld_inc16 = "B19001F_017",
                                    tw_hhld_inc = "B19001G_001",
                                    tw_hhld_inc1 = "B19001G_002",
                                    tw_hhld_inc2 = "B19001G_003",
                                    tw_hhld_inc3 = "B19001G_004",
                                    tw_hhld_inc4 = "B19001G_005",
                                    tw_hhld_inc5 = "B19001G_006",
                                    tw_hhld_inc6 = "B19001G_007",
                                    tw_hhld_inc7 = "B19001G_008",
                                    tw_hhld_inc8 = "B19001G_009",
                                    tw_hhld_inc9 = "B19001G_010",
                                    tw_hhld_inc10 = "B19001G_011",
                                    tw_hhld_inc11 = "B19001G_012",
                                    tw_hhld_inc12 = "B19001G_013",
                                    tw_hhld_inc13 = "B19001G_014",
                                    tw_hhld_inc14 = "B19001G_015",
                                    tw_hhld_inc15 = "B19001G_016",
                                    tw_hhld_inc16 = "B19001G_017",
                                    hl_hhld_inc = "B19001I_001",
                                    hl_hhld_inc1 = "B19001I_002",
                                    hl_hhld_inc2 = "B19001I_003",
                                    hl_hhld_inc3 = "B19001I_004",
                                    hl_hhld_inc4 = "B19001I_005",
                                    hl_hhld_inc5 = "B19001I_006",
                                    hl_hhld_inc6 = "B19001I_007",
                                    hl_hhld_inc7 = "B19001I_008",
                                    hl_hhld_inc8 = "B19001I_009",
                                    hl_hhld_inc9 = "B19001I_010",
                                    hl_hhld_inc10 = "B19001I_011",
                                    hl_hhld_inc11 = "B19001I_012",
                                    hl_hhld_inc12 = "B19001I_013",
                                    hl_hhld_inc13 = "B19001I_014",
                                    hl_hhld_inc14 = "B19001I_015",
                                    hl_hhld_inc15 = "B19001I_016",
                                    hl_hhld_inc16 = "B19001I_017",
                                    nhwh_hhld_inc = "B19001H_001",
                                    nhwh_hhld_inc1 = "B19001H_002",
                                    nhwh_hhld_inc2 = "B19001H_003",
                                    nhwh_hhld_inc3 = "B19001H_004",
                                    nhwh_hhld_inc4 = "B19001H_005",
                                    nhwh_hhld_inc5 = "B19001H_006",
                                    nhwh_hhld_inc6 = "B19001H_007",
                                    nhwh_hhld_inc7 = "B19001H_008",
                                    nhwh_hhld_inc8 = "B19001H_009",
                                    nhwh_hhld_inc9 = "B19001H_010",
                                    nhwh_hhld_inc10 = "B19001H_011",
                                    nhwh_hhld_inc11 = "B19001H_012",
                                    nhwh_hhld_inc12 = "B19001H_013",
                                    nhwh_hhld_inc13 = "B19001H_014",
                                    nhwh_hhld_inc14 = "B19001H_015",
                                    nhwh_hhld_inc15 = "B19001H_016",
                                    nhwh_hhld_inc16 = "B19001H_017"),
                      state = st,
                      output = "wide",
                      survey = "acs5",
                      year = 2022)
  
  ## add state identifying variable ##
  acs.2022$state <- st
  
  ## store the data frame in the list ## 
  state.tracts.2022[[state.counter]] <- acs.2022
  
  ## increase interval by 1 ## 
  state.counter <- state.counter + 1
  
}

##
## combine and process all the data ## 
##

all.tracts.acs.2022.temp <- bind_rows(state.tracts.2022)

all.tracts.acs.2022.rf <- all.tracts.acs.2022.temp %>%
  select(-ends_with("M",ignore.case = FALSE)) %>%
  rename_with(~str_remove(., 'E')) %>%
  rename(GEOID = GOID) %>%
  mutate(cfips = substr(GEOID,1,5))

##
## get county info (needed for sampling rate to create h4) ##
##

countyinc22 <- get_acs(geography = "county", 
                       variables = c(totpop = "B01003_001",
                                     totsel = "B98001_001",
                                     hsuint = "B98001_002",
                                     tothu = "B25001_001"),
                       survey = "acs5",
                       output = "wide",
                       year = 2022)

all.county.inc.2022.final <- countyinc22 %>%
  select(GEOID,
         ends_with("E")) %>%
  rename_at(.vars = vars(ends_with("E")),
            .funs = funs(sub("E", "", .))) %>%
  mutate(r0 = hsuint/tothu,
         cfips = GEOID) %>%
  select(cfips,
         r0)

##
## now, merge on county sampling rate to tracts ##
##

paste("IS 2022 TRACTS FILE UNIQUE BY CFIPS?")
nrow(all.tracts.acs.2022.rf) == length(unique(all.tracts.acs.2022.rf$cfips))
paste("CLASS OF CFIPS IN 2022 TRACT FILE")
class(all.tracts.acs.2022.rf$cfips)
paste("CFIPS CHECK IN 2022 TRACT FILE")
range(nchar(trim(all.tracts.acs.2022.rf$cfips)))

paste("IS 2022 COUNTY FILE UNIQUE BY CFIPS?")
nrow(all.county.inc.2022.final) == length(unique(all.county.inc.2022.final$cfips))
paste("CLASS OF CFIPS IN 2022 COUNTY FILE")
class(all.county.inc.2022.final$cfips)
paste("CFIPS CHECK IN 2022 COUNTY FILE")
range(nchar(trim(all.county.inc.2022.final$cfips)))

all.tracts.acs.2022.m2 <- stata.merge(all.tracts.acs.2022.rf,
                                      all.county.inc.2022.final,
                                      "cfips")

## check merge ##
paste("2022 COUNTY FILES MERGE")
table(all.tracts.acs.2022.m2$merge.variable, useNA = "ifany")


## verified - these are counties in Puerto Rico ## 
m2.pr.2 <- all.tracts.acs.2022.m2 %>%
  filter(merge.variable == 2) %>%
  select(cfips) %>%
  rename(GEOID = cfips) %>%
  inner_join(countyinc22, "GEOID") %>%
  filter(!grepl("Puerto Rico", NAME))

paste("ARE NON-MATCHING OBS ALL IN PUERTO RICO?")
nrow(m2.pr.2) == 0

## keep matches ##
all.tracts.acs.2022 <- all.tracts.acs.2022.m2 %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable) %>%
  rename(GEOID22 = GEOID)

##
## fix CT issue in 2022 data ##
## documentation: https://www.federalregister.gov/documents/2022/06/06/2022-12063/change-to-county-equivalents-in-the-state-of-connecticut
##

## crosswalk comes from CT Data Collaborative https://github.com/CT-Data-Collaborative/2022-tract-crosswalk ##

ct.xwalk.fix.in <- read.csv("2022tractcrosswalk.csv",
                            header = T,
                            stringsAsFactors = F)

ct.xwalk.fix <- ct.xwalk.fix.in %>%
  mutate(GEOID20 = str_pad(tract_fips_2020, 11, "left", "0"),
         GEOID22 = str_pad(Tract_fips_2022, 11, "left", "0"))

paste("IS THE CT XWALK UNIQUE BY 2020 GEOID?")
length(unique(ct.xwalk.fix$GEOID20)) == nrow(ct.xwalk.fix)
paste("IS THE CT XWALK UNIQUE BY 2022 GEOID?")
length(unique(ct.xwalk.fix$GEOID22)) == nrow(ct.xwalk.fix)
paste("2020 GEOID CHECK IN CT XWALK")
range(nchar(trim(ct.xwalk.fix$GEOID20)))
paste("2022 GEOID CHECK IN CT XWALK")
range(nchar(trim(ct.xwalk.fix$GEOID22)))

paste("IS 2022 TRACT FILE UNIQUE BY 2022 GEOID?")
length(unique(all.tracts.acs.2022$GEOID22)) == nrow(all.tracts.acs.2022)
paste("2022 GEOID CHECK in 2022 TRACT FILE")
range(nchar(trim(all.tracts.acs.2022$GEOID22)))

## merge files ##
all.tracts.acs.2022.rf.m <- stata.merge(all.tracts.acs.2022,
                                        ct.xwalk.fix,
                                        "GEOID22")

## check merge ##
paste("MERGE OF 2022 TRACT FILE AND CT XWALK")
table(all.tracts.acs.2022.rf.m$merge.variable, useNA = "ifany")

##
## apply fix ## 
##

all.tracts.acs.2022.rf <- all.tracts.acs.2022.rf.m %>%
  mutate(GEOID = case_when(merge.variable == 3 ~ GEOID20,
                           merge.variable == 1 ~ GEOID22)) %>%
  select(-merge.variable,
         -GEOID20,
         -GEOID22) %>%
  select(GEOID, 
         everything())

## check obs ##
paste("OBS IN 2022 TRACT FILE PRE CT XWALK FIX")
nrow(all.tracts.acs.2022)
paste("OBS IN 2022 TRACT FILE POST CT XWALK FIX")
nrow(all.tracts.acs.2022.rf)

##
## now, county level vars ##
##

all.counties.acs.2022 <- get_acs(geography = "county",
                             variables = c(pop_total_acs = "B01003_001",
                                           median_hhld_inc = "B19013_001"),
                             survey = "acs5",
                             output = "wide",
                             year = 2022)

##
## fix CT issue in 2022 data ##
## documentation: https://www.federalregister.gov/documents/2022/06/06/2022-12063/change-to-county-equivalents-in-the-state-of-connecticut
##

all.counties.acs.2022.rf <- all.counties.acs.2022 %>%
  select(-pop_total_acsM,
         -median_hhld_incM) %>%
  rename(pop_total_acs = pop_total_acsE,
         median_hhld_inc = median_hhld_incE) %>%
  mutate(GEOID = case_when(GEOID == "09110" ~ "09003",
                           GEOID == "09120" ~ "09001",
                           GEOID == "09190" ~ "09001",
                           GEOID == "09130" ~ "09007",
                           GEOID == "09140" ~ "09009",
                           GEOID == "09170" ~ "09009",
                           GEOID == "09150" ~ "09015",
                           GEOID == "09160" ~ "09005",
                           GEOID == "09180" ~ "09011",
                           TRUE ~ GEOID))

all.counties.acs.2022.rd <- all.counties.acs.2022.rf %>%
  group_by(GEOID) %>%
  mutate(pop_total_msa = sum(pop_total_acs, na.rm=T),
         msa_wt = pop_total_acs/pop_total_msa) %>%
  summarize_at(vars(median_hhld_inc),
               funs(weighted.mean(., msa_wt, na.rm=T)))

paste("OBS IN 2022 COUNTY FILE PRE CT XWALK FIX")
nrow(all.counties.acs.2022.rf)
paste("OBS IN 2022 COUNTY FILE POST CT XWALK FIX")
nrow(all.counties.acs.2022.rd)

##
## merge 2020 and 2022 tracts ##
##

## pre-merge checks ##
paste("GEOID CHECK IN 2020 TRACT FILE")
range(nchar(trim(all.tracts.census.2020$GEOID)))
paste("CLASS OF GEOID IN 2020 TRACT FILE")
class(all.tracts.census.2020$GEOID)
paste("IS 2020 TRACT FILE UNIQUE BY GEOID?")
length(unique(all.tracts.census.2020$GEOID)) == nrow(all.tracts.census.2020)

paste("GEOID CHECK IN 2022 TRACT FILE")
range(nchar(trim(all.tracts.acs.2022.rf$GEOID)))
paste("CLASS OF GEOID IN 2022 TRACT FILE")
class(all.tracts.acs.2022.rf$GEOID)
paste("IS 2022 TRACT FILE UNIQUE BY GEOID?")
length(unique(all.tracts.acs.2022.rf$GEOID)) == nrow(all.tracts.acs.2022.rf)

## merge tracts ##
all.tracts.2020.cb.m <- stata.merge(all.tracts.census.2020,
                                    all.tracts.acs.2022.rf,
                                    "GEOID")

## check merge ## 
paste("2020 TRACT FILES MERGE")
table(all.tracts.2020.cb.m$merge.variable)

## verify that the non-matching tracts have 0 population ##
nm1 <- filter(all.tracts.2020.cb.m, merge.variable == 1)
paste("FIRST SET OF NON-MATCHES HAVE 0 POP")
sum(nm1$pop_total_2020) == 0

nm2 <- filter(all.tracts.2020.cb.m, merge.variable == 2)
paste("SECOND SET OF NON-MATCHES HAVE 0 POP")
sum(nm2$pop_total_acs) == 0

## keep matches ## 
all.tracts.2020.cb <- all.tracts.2020.cb.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable) 

## save merged 2020 tracts ## 
save(all.tracts.2020.cb,
     file= paste(output_path,
                "/001_2020_trts_cb.Rda", 
                sep=""))

##
## merge 2020 and 2022 counties ##
##

## pre-merge checks ##
paste("FIPS CHECK IN 2020 COUNTY FILE")
range(nchar(trim(all.counties.census.2020.rd$GEOID)))
paste("CLASS OF FIPS IN 2020 COUNTY FILE")
class(all.counties.census.2020.rd$GEOID)
paste("IS 2020 COUNTY FILE UNIQUE BY FIPS?")
length(unique(all.counties.census.2020.rd$GEOID)) == nrow(all.counties.census.2020.rd)

paste("FIPS CHECK IN 2022 COUNTY FILE")
range(nchar(trim(all.counties.acs.2022.rd$GEOID)))
paste("CLASS OF FIPS IN 2022 COUNTY FILE")
class(all.counties.acs.2022.rd$GEOID)
paste("IS 2022 COUNTY FILE UNIQUE BY FIPS?")
length(unique(all.counties.acs.2022.rd$GEOID)) == nrow(all.counties.acs.2022.rd)


all.counties.2020.cb.m <- stata.merge(all.counties.census.2020.rd,
                                      all.counties.acs.2022.rd,
                                      "GEOID")

## diagnose merge ## 
paste("2020 COUNTY FILES MERGE")
table(all.counties.2020.cb.m$merge.variable)

## check: non-matching obs are all in Puerto Rico ##
m2.counties.2020 <- all.counties.2020.cb.m %>%
  filter(merge.variable == 2)

paste("ARE ALL NON-MATCHING OBS IN PUERTO RICO?")
sum(substr(m2.counties.2020$GEOID,1,2) == "72") == nrow(m2.counties.2020)

## keep matches ## 
all.counties.2020.cb <- all.counties.2020.cb.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable) 

## save merged 2020 tracts ## 
save(all.counties.2020.cb,
     file= paste(output_path,
                 "/001_2020_counties_cb.Rda", 
                 sep=""))

#################################
## Census MSA delineation file ##
#################################

census.del.2020 <- read_excel(paste(input_path,
                                    "/list1_2020.xls",
                                    sep=""),
                              col_names = FALSE,
                              col_types = "text")

## data cleaning ## 
#cdt <- census.del.2020.temp <- census.del.2020[c(1,2,1920,1921,1922,1923),]
census.del.2020.temp <- census.del.2020[-c(1,2,1920,1921,1922,1923),]

## make this row the data frame header ## 
census.del.2020.cols <- as.vector(census.del.2020.temp[1,])
census.del.2020.temp2 <- census.del.2020.temp[-1,]
names(census.del.2020.temp2) <- census.del.2020.cols

## diagnostic checks ##
paste("CBSA CODE CHECK IN DELINEATION FILE")
range(nchar(trim(census.del.2020.temp2$`CBSA Code`)),na.rm=T)
paste("FIPS COUNTY CODE CHECK IN DELINEATION FILE")
range(nchar(trim(census.del.2020.temp2$`FIPS County Code`)),na.rm=T)
paste("FIPS STATE CODE CHECK IN DELINEATION FILE")
range(nchar(trim(census.del.2020.temp2$`FIPS State Code`)),na.rm=T)

## final MSA delineation file ## 
census.del.2020.final <- census.del.2020.temp2 %>%
  select(`CBSA Code`,
         `CBSA Title`,
         `Metropolitan/Micropolitan Statistical Area`,
         `FIPS State Code`,
         `FIPS County Code`) %>%
  mutate(FIPS = paste(`FIPS State Code`,`FIPS County Code`,sep=""))

## save delineation file ## 
save(census.del.2020.final,
     file= paste(output_path,
                 "/001_2020_Census_del.Rda", 
                sep=""))


## END OF PROGRAM ## 

sink()
sink(type = "message")

